Load necessary packages
library(caret) # Classification and Regression Training
## Warning: package 'caret' was built under R version 3.5.2
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2
library(corrplot)
## corrplot 0.84 loaded
library(dplyr) # A Grammar of Data Manipulations
## Warning: package 'dplyr' was built under R version 3.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats) # Tools for Working with Categorical Variables (Factors)
## Warning: package 'forcats' was built under R version 3.5.2
library(gdata) # Various R Programming Tools for Data Manipulation
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
library(ggplot2) # Create Elegant Data Visualizations Using the Grammar of Graphics
library(ggrepel) # Automatically Position Non-Overlapping Text Labels with ‘ggplot2’
## Warning: package 'ggrepel' was built under R version 3.5.2
library(ggridges) # Ridgeline Plots in ‘ggplot2’
##
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
##
## scale_discrete_manual
library(ggthemes) # Extra Themes, Scales, and Geoms for ‘ggplot2’
## Warning: package 'ggthemes' was built under R version 3.5.2
library(gmodels) # Various R Programming Tools for Model Fitting
library(grid) # The Grid Graphics Package
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:gdata':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
library(knitr) # A General-Purpose Package for Dynamic Report Generation in R
## Warning: package 'knitr' was built under R version 3.5.2
library(lmtest) # Testing Linear Regression Models
## Warning: package 'lmtest' was built under R version 3.5.2
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(plyr) # Tools for Splitting, Applying and Combining Data
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(purrr) # Functional Programming Tools
## Warning: package 'purrr' was built under R version 3.5.2
##
## Attaching package: 'purrr'
## The following object is masked from 'package:plyr':
##
## compact
## The following object is masked from 'package:gdata':
##
## keep
## The following object is masked from 'package:caret':
##
## lift
library(readr) # Read Rectangular Text Data
library(rpart)
## Warning: package 'rpart' was built under R version 3.5.2
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(scales) # Scale Functions for Visualization
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
## The following object is masked from 'package:purrr':
##
## discard
library(shiny) # Web Application Framework for R
## Warning: package 'shiny' was built under R version 3.5.2
library(skimr) # Compact and Flexible Summaries of Data
## Warning: package 'skimr' was built under R version 3.5.2
##
## Attaching package: 'skimr'
## The following object is masked from 'package:knitr':
##
## kable
## The following object is masked from 'package:stats':
##
## filter
library(stringr) # Simple, Consistent Wrappers for Common String Operations
library(tibble) # Simple Data Frames
## Warning: package 'tibble' was built under R version 3.5.2
library(tidyr) # Easily Tidy Data with ‘spread()’ and ‘gather()’ Functions
## Warning: package 'tidyr' was built under R version 3.5.2
library(tidyverse) # Install and Load ‘Tidyverse’ Packages
library(usmap)
## Warning: package 'usmap' was built under R version 3.5.2
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.5.2
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
## The following object is masked from 'package:plyr':
##
## ozone
library(mapdata)
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.5.2
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggmap':
##
## theme_nothing
## The following object is masked from 'package:ggthemes':
##
## theme_map
library("googleway")
library("ggspatial")
library("rnaturalearth")
library("rnaturalearthdata")
theme_set(theme_bw())
library("sf")
## Warning: package 'sf' was built under R version 3.5.2
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(gridExtra)
world <- ne_countries(scale = "medium", returnclass = "sf") #for mapping
Load cleaned data
clean_data <- readRDS("../../data/processed_data/processeddata.rds")
skim(clean_data)
## Skim summary statistics
## n obs: 13165
## n variables: 23
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## bioregion 0 13165 13165 6
## georegion 0 13165 13165 9
## group_code 0 13165 13165 9
## group_code_UCSC_other 0 13165 13165 2
## island 0 13165 13165 6
## marine_season_code 0 13165 13165 57
## marine_site_name 0 13165 13165 77
## method_code 0 13165 13165 4
## method_code_IP_other 0 13165 13165 2
## mpa_region 0 13165 13165 6
## season_name 0 13165 13165 4
## site_code 0 13165 13165 77
## species_code 0 13165 13165 3
## state 0 13165 13165 4
## top_counts ordered
## San: 5608, Oly: 4138, Gov: 2366, Sal: 652 FALSE
## CA : 5382, CA : 2592, CA : 1660, OR: 1386 FALSE
## UCS: 9657, UCL: 2032, PBN: 400, SSS: 366 FALSE
## UCS: 9657, Oth: 3508, NA: 0 FALSE
## Mai: 12364, Bar: 279, Sad: 250, Hat: 150 FALSE
## SU1: 440, SU1: 403, SU1: 392, FA1: 376 FALSE
## Mil: 524, Sco: 513, Sti: 472, End: 468 FALSE
## IP: 12012, BT2: 782, GSE: 336, TS3: 35 FALSE
## IP: 12012, Oth: 1153, NA: 0 FALSE
## Cen: 5382, Sou: 2627, NUL: 1932, Nor: 1660 FALSE
## Sum: 4552, Fal: 4489, Spr: 4112, Win: 12 FALSE
## MCR: 524, SCT: 513, SWC: 472, END: 468 FALSE
## P.o: 11416, K FALSE
## CA: 10548, OR: 1386, WA: 865, AK: 366 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25
## marine_common_season 0 13165 13165 116.82 19.23 78 101
## marine_common_year 0 13165 13165 2009.7 4.81 2000 2006
## marine_sort_order 0 13165 13165 5566.24 1125.15 1720 5020
## season_sequence 0 13165 13165 2.03 0.81 1 1
## size_bin 0 13165 13165 82.44 42.2 5 50
## size_sort_order 0 13165 13165 9.57 4.15 1 6
## total 0 13165 13165 9.92 16.43 1 2
## p50 p75 p100 hist
## 118 131 152 ▅▅▆▇▇▇▇▅
## 2010 2013 2018 ▃▅▅▇▇▆▆▆
## 6090 6370 7650 ▁▁▁▂▃▃▇▁
## 2 3 4 ▇▁▇▁▁▇▁▁
## 80 110 320 ▅▇▇▃▁▁▁▁
## 10 12 33 ▅▇▇▃▁▁▁▁
## 4 11 360 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## latitude 0 13165 13165 38.67 5.2 32.87 34.73 36.62
## longitude 0 13165 13165 -122.25 2.86 -135.38 -123.98 -121.94
## p75 p100 hist
## 41.65 57.05 ▇▆▃▂▁▁▁▁
## -120.62 -117.25 ▁▁▁▁▅▇▆▃
clean_data %>% ggplot() +
geom_histogram(aes(total))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
unique_sites <- subset(clean_data, !duplicated(clean_data$site_code))
#names(unique_sites)
sites_coordinates <- unique_sites[c(2,5:6,23)]
sites_coordinates_contig <- filter(sites_coordinates, sites_coordinates$state != "Alaska")
#sites_coordinates
CA_coordinates <- filter(sites_coordinates, sites_coordinates$state == "CA")
OR_coordinates <- filter(sites_coordinates, sites_coordinates$state == "OR")
WA_coordinates <- filter(sites_coordinates, sites_coordinates$state == "WA")
AK_coordinates <- filter(sites_coordinates, sites_coordinates$state == "AK")
CA_coordinates <- as.data.frame(CA_coordinates)
OR_coordinates <- as.data.frame(OR_coordinates)
WA_coordinates <- as.data.frame(WA_coordinates)
AK_coordinates <- as.data.frame(AK_coordinates)
After messing around with several different mapping tools in R, I decided that trying to display the whole range (CA to AK) on one map would be really hard, and the Alaska sites are really just at one specific location, so I’m going to split Alaska off from the three states that are part of the contiguous US
Alaska:
world <- ne_countries(scale = "medium", returnclass = "sf")
AK_map <- ggplot(data = world) +
geom_sf() +
geom_point(data= sites_coordinates,aes(x=sites_coordinates$longitude, y=sites_coordinates$latitude,), size = 4, alpha = 0.4, color ="orange") +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-168, -131), ylim = c(51, 73), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -151, y = 66, label = "ALASKA", size = 600)) +
geom_text(aes(x = -149.9, y = 61.4, label = "*", size = 600)) +
geom_text(aes(x = -154, y = 62.6, label = "Anchorage")) +
scale_x_continuous(breaks = c(-160, -150, -140)) +
theme(legend.position = "none")
AK_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/AK_sites_map.png",plot = AK_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
State <- sites_coordinates_contig$state
CA_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-126, -116), ylim = c(32, 42.4), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")+
geom_text(aes(x = -122.7, y = 37.7, label = ">")) +
geom_text(aes(x = -123.5, y = 37.6, label = "San ")) +
geom_text(aes(x = -124.2, y = 37.1, label = "Francisco")) +
geom_text(aes(x = -120, y = 39, label = "CALIFORNIA")) +
geom_text(aes(x = -118.9, y = 33.7, label = ">")) +
geom_text(aes(x = -120.9, y = 33.6, label = "Los Angeles")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-124, -122, -120, -118))
CA_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/CA_sites_map.png",plot = CA_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
OR_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "br", width_hint = 0.3) +
coord_sf(xlim = c(-127.5, -120.8), ylim = c(41.5, 47.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -122.4, y = 44, label = "OREGON")) +
geom_text(aes(x = -122.8, y = 45.6, label = "*")) +
geom_text(aes(x = -122.8, y = 45.4, label = "Portland")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-126, -124, -122))
OR_map
## Scale on map varies by more than 10%, scale bar may be inaccurate
ggsave(filename = "../../results/OR_sites_map.png",plot = OR_map, width = 3.5, height = 4)
## Scale on map varies by more than 10%, scale bar may be inaccurate
WA_map <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "br", width_hint = 0.3) +
coord_sf(xlim = c(-125.2, -121.3), ylim = c(45.8, 49.1), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
geom_text(aes(x = -122.5, y = 46.7, label = "WASHINGTON")) +
geom_text(aes(x = -123.7, y = 47.93, label = "Olympic")) +
geom_text(aes(x = -123.7, y = 47.75, label = "Peninsula")) +
geom_text(aes(x = -121.9, y = 47.75, label = "< Seattle")) +
theme(legend.position = "none") +
scale_x_continuous(breaks = c(-124, -123, -122))
WA_map
ggsave(filename = "../../results/WA_sites_map.png",plot = WA_map, width = 3.5, height = 4)
ggplot(data = world) +
geom_sf() +
geom_jitter(data= sites_coordinates_contig,aes(x=sites_coordinates_contig$longitude, y=sites_coordinates_contig$latitude,col=State), size = 3, alpha = 0.6, width = 0.2) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-128, -115), ylim = c(32, 49.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")+ geom_text(aes(x = -121, y = 38, label = "< San"))
## Scale on map varies by more than 10%, scale bar may be inaccurate
CA <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "CA")
summary(CA)
## marine_site_name marine_common_year size_bin
## Mill Creek : 524 Min. :2000 Min. : 5.00
## Scott Creek : 513 1st Qu.:2005 1st Qu.: 50.00
## Stillwater : 472 Median :2009 Median : 80.00
## Enderts : 468 Mean :2009 Mean : 81.92
## Terrace Point: 447 3rd Qu.:2013 3rd Qu.:110.00
## Andrew Molera: 443 Max. :2018 Max. :230.00
## (Other) :7681
## total state
## Min. : 1.000 AK: 0
## 1st Qu.: 2.000 CA:10548
## Median : 5.000 OR: 0
## Mean : 9.607 WA: 0
## 3rd Qu.: 11.000
## Max. :360.000
##
OR <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "OR")
summary(OR)
## marine_site_name marine_common_year size_bin
## Fogarty Creek:351 Min. :2000 Min. : 5.00
## Bob Creek :334 1st Qu.:2005 1st Qu.: 50.00
## Burnt Hill :260 Median :2010 Median : 80.00
## Ecola :228 Mean :2010 Mean : 87.21
## Cape Arago :213 3rd Qu.:2014 3rd Qu.:120.00
## Alegria : 0 Max. :2018 Max. :230.00
## (Other) : 0
## total state
## Min. : 1.00 AK: 0
## 1st Qu.: 2.00 CA: 0
## Median : 7.00 OR:1386
## Mean : 15.28 WA: 0
## 3rd Qu.: 18.00
## Max. :212.00
##
WA <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "WA")
summary(WA)
## marine_site_name marine_common_year size_bin
## Saddlebag North Cove:163 Min. :2008 Min. : 5.00
## Post Point :151 1st Qu.:2011 1st Qu.: 50.00
## Hat Island West :116 Median :2014 Median : 90.00
## Point Grenville :111 Mean :2014 Mean : 89.26
## Kydikabbit Point :102 3rd Qu.:2016 3rd Qu.:120.00
## Saddlebag South East: 87 Max. :2018 Max. :320.00
## (Other) :135
## total state
## Min. : 1.000 AK: 0
## 1st Qu.: 1.000 CA: 0
## Median : 2.000 OR: 0
## Mean : 6.776 WA:865
## 3rd Qu.: 5.000
## Max. :133.000
##
AK <- clean_data %>% select(marine_site_name, marine_common_year, size_bin, total, state) %>%
filter(state == "AK")
summary(AK)
## marine_site_name marine_common_year size_bin
## Sage Rock :148 Min. :2011 Min. : 5.00
## Pirates Cove :131 1st Qu.:2013 1st Qu.: 40.00
## Kayak Island : 87 Median :2015 Median : 60.00
## Alegria : 0 Mean :2015 Mean : 63.51
## Andrew Molera: 0 3rd Qu.:2017 3rd Qu.: 87.50
## Arroyo Hondo : 0 Max. :2018 Max. :190.00
## (Other) : 0
## total state
## Min. : 1.00 AK:366
## 1st Qu.: 1.00 CA: 0
## Median : 3.00 OR: 0
## Mean : 6.03 WA: 0
## 3rd Qu.: 7.00
## Max. :117.00
##
[INSERT TABLE SUMMARIZING THIS INFORMATION]
clean_data %>% ggplot(aes(marine_common_year, total)) + geom_jitter(width = 0.15, alpha = 0.25)
clean_data %>% ggplot(aes(marine_common_year, total, col=state)) + geom_jitter(width = 0.15, alpha = 0.25)
Woah, California is dominating this visualization.
clean_data %>%
ggplot(aes(marine_common_year, total, col=state)) + geom_jitter(width = 0.15, alpha = 0.25) +
facet_wrap(~state)
Washington and Alaska clearly weren’t surveyed for all years during which California and Oregon were.
clean_data %>%
ggplot(aes(marine_common_year, log(total), col=state)) + geom_jitter(width = 0.15, alpha = 0.25) +
facet_wrap(~state)
clean_data %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot()
P.ochra_only <- filter(clean_data, species_code == "P.ochraceus")
K.tuni_only <- filter(clean_data, species_code == "K.tunicata")
E.tros_only <- filter(clean_data, species_code == "E.troschelii")
skim(P.ochra_only)
## Skim summary statistics
## n obs: 11416
## n variables: 23
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## bioregion 0 11416 11416 6
## georegion 0 11416 11416 9
## group_code 0 11416 11416 9
## group_code_UCSC_other 0 11416 11416 2
## island 0 11416 11416 6
## marine_season_code 0 11416 11416 57
## marine_site_name 0 11416 11416 77
## method_code 0 11416 11416 4
## method_code_IP_other 0 11416 11416 2
## mpa_region 0 11416 11416 6
## season_name 0 11416 11416 4
## site_code 0 11416 11416 77
## species_code 0 11416 11416 1
## state 0 11416 11416 4
## top_counts ordered
## San: 5086, Oly: 3232, Gov: 2366, Sal: 492 FALSE
## CA : 4860, CA : 2592, CA : 1271, OR: 1138 FALSE
## UCS: 8284, UCL: 2032, PBN: 291, CSU: 283 FALSE
## UCS: 8284, Oth: 3132, NA: 0 FALSE
## Mai: 10885, Sad: 192, Bar: 151, Hat: 99 FALSE
## FA1: 323, FA0: 313, SP0: 313, FA1: 312 FALSE
## Sco: 453, Ter: 446, Haz: 404, Mil: 404 FALSE
## IP: 10263, BT2: 782, GSE: 336, TS3: 35 FALSE
## IP: 10263, Oth: 1153, NA: 0 FALSE
## Cen: 4860, Sou: 2627, NUL: 1414, Nor: 1271 FALSE
## Fal: 4157, Spr: 3786, Sum: 3461, Win: 12 FALSE
## SCT: 453, TPT: 446, HAZ: 404, MCR: 404 FALSE
## P.o: 11416, E FALSE
## CA: 9433, OR: 1138, WA: 640, AK: 205 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25
## marine_common_season 0 11416 11416 114.4 19.14 78 99
## marine_common_year 0 11416 11416 2009.09 4.79 2000 2005
## marine_sort_order 0 11416 11416 5682.02 1048.93 1720 5050
## season_sequence 0 11416 11416 2.03 0.84 1 1
## size_bin 0 11416 11416 86.83 42.67 5 50
## size_sort_order 0 11416 11416 9.68 4.27 1 6
## total 0 11416 11416 10.85 17.41 1 2
## p50 p75 p100 hist
## 114 130 152 ▅▅▆▇▆▆▅▃
## 2009 2013 2018 ▃▅▅▇▆▆▃▅
## 6150 6390 7650 ▁▁▁▂▃▂▇▁
## 2 3 4 ▇▁▆▁▁▇▁▁
## 90 120 320 ▅▇▇▅▁▁▁▁
## 10 13 33 ▅▇▇▅▁▁▁▁
## 5 13 360 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## latitude 0 11416 11416 38.13 4.82 32.87 34.55 36.51
## longitude 0 11416 11416 -121.94 2.57 -135.38 -123.51 -121.91
## p75 p100 hist
## 40.34 57.05 ▇▅▂▂▁▁▁▁
## -120.61 -117.25 ▁▁▁▁▅▇▇▅
skim(K.tuni_only)
## Skim summary statistics
## n obs: 1676
## n variables: 23
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## bioregion 0 1676 1676 4
## georegion 0 1676 1676 7
## group_code 0 1676 1676 6
## group_code_UCSC_other 0 1676 1676 2
## island 0 1676 1676 5
## marine_season_code 0 1676 1676 27
## marine_site_name 0 1676 1676 46
## method_code 0 1676 1676 1
## method_code_IP_other 0 1676 1676 1
## mpa_region 0 1676 1676 5
## season_name 0 1676 1676 3
## site_code 0 1676 1676 46
## species_code 0 1676 1676 1
## state 0 1676 1676 4
## top_counts ordered
## Oly: 906, San: 522, Sal: 126, AK_: 122 FALSE
## CA : 522, CA : 389, OR: 248, CA : 204 FALSE
## UCS: 1371, SSS: 122, PBN: 94, OCN: 65 FALSE
## UCS: 1371, Oth: 305, NA: 0 FALSE
## Mai: 1460, Bar: 93, Hat: 48, Sad: 46 FALSE
## SU1: 131, SU1: 131, SU1: 129, SU1: 128 FALSE
## Fal: 144, Mil: 120, Sti: 110, End: 105 FALSE
## IP: 1676, BT2: 0, GSE: 0, TS3: 0 FALSE
## IP: 1676, Oth: 0, NA: 0 FALSE
## Cen: 522, NUL: 445, Nor: 389, Nor: 204 FALSE
## Sum: 1023, Fal: 331, Spr: 322, Win: 0 FALSE
## FKC: 144, MCR: 120, SWC: 110, END: 105 FALSE
## K.t: 1676, E FALSE
## CA: 1115, OR: 248, WA: 191, AK: 122 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## marine_common_season 0 1676 1676 132.29 10.05 114 123 133
## marine_common_year 0 1676 1676 2013.57 2.54 2009 2011 2014
## marine_sort_order 0 1676 1676 4911.32 1220.64 1720 4400 5050
## season_sequence 0 1676 1676 2.01 0.62 1 2 2
## size_bin 0 1676 1676 53.79 23.85 5 35 50
## size_sort_order 0 1676 1676 8.95 3.05 1 7 9
## total 0 1676 1676 3.9 3.26 1 1 3
## p75 p100 hist
## 141 150 ▆▇▅▆▇▇▅▆
## 2016 2018 ▇▇▅▆▇▇▆▇
## 6130 6310 ▂▁▁▁▂▇▁▆
## 2 3 ▂▁▁▇▁▁▁▂
## 70 130 ▃▃▇▅▇▂▁▁
## 11 17 ▂▂▃▇▇▅▁▁
## 5 19 ▇▂▂▁▁▁▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## latitude 0 1676 1676 41.73 5.77 35.45 36.56 40.34
## longitude 0 1676 1676 -124.07 3.33 -135.38 -124.14 -123.73
## p75 p100 hist
## 44.24 57.05 ▇▅▆▂▃▁▁▂
## -121.95 -120.95 ▁▁▁▁▁▁▇▆
skim(E.tros_only)
## Skim summary statistics
## n obs: 73
## n variables: 23
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## bioregion 0 73 73 2
## georegion 0 73 73 2
## group_code 0 73 73 5
## group_code_UCSC_other 0 73 73 2
## island 0 73 73 5
## marine_season_code 0 73 73 10
## marine_site_name 0 73 73 10
## method_code 0 73 73 1
## method_code_IP_other 0 73 73 1
## mpa_region 0 73 73 1
## season_name 0 73 73 3
## site_code 0 73 73 10
## species_code 0 73 73 1
## state 0 73 73 2
## top_counts ordered
## AK_: 39, Sal: 34, Cha: 0, Gov: 0 FALSE
## AK: 39, WA : 34, CA : 0, CA : 0 FALSE
## SSS: 39, UW: 16, PBN: 15, UCS: 2 FALSE
## Oth: 71, UCS: 2, NA: 0 FALSE
## Bar: 35, Mai: 19, Sad: 12, Kay: 4 FALSE
## SU1: 24, SU1: 13, SU1: 12, SU1: 9 FALSE
## Sag: 19, Pir: 16, Man: 13, Sad: 10 FALSE
## IP: 73, BT2: 0, GSE: 0, TS3: 0 FALSE
## IP: 73, Oth: 0, NA: 0 FALSE
## NUL: 73, Cen: 0, Nor: 0, Nor: 0 FALSE
## Sum: 68, Spr: 4, Fal: 1, Win: 0 FALSE
## SAG: 19, PIR: 16, MAN: 13, SBN: 10 FALSE
## E.t: 73, K FALSE
## AK: 39, WA: 34, CA: 0, OR: 0 FALSE
##
## ── Variable type:integer ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## marine_common_season 0 73 73 140.75 9.19 114 134 142
## marine_common_year 0 73 73 2015.7 2.28 2009 2014 2016
## marine_sort_order 0 73 73 2496.03 833.39 1720 1720 1740
## season_sequence 0 73 73 1.96 0.26 1 2 2
## size_bin 0 73 73 54.11 28.41 5 30 50
## size_sort_order 0 73 73 6.4 2.87 1 4 6
## total 0 73 73 1.84 1.83 1 1 1
## p75 p100 hist
## 150 150 ▁▁▁▂▃▁▃▇
## 2018 2018 ▁▁▁▂▃▁▃▇
## 3290 3850 ▇▁▁▁▁▃▃▁
## 2 3 ▁▁▁▇▁▁▁▁
## 70 160 ▅▇▆▅▅▁▁▁
## 8 17 ▅▇▆▅▅▁▁▁
## 2 13 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ───────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75
## latitude 0 73 73 52.86 4.5 47.58 48.52 56.99 57.05
## longitude 0 73 73 -129.42 6.4 -135.38 -135.35 -135.32 -122.55
## p100 hist
## 57.05 ▇▁▁▁▁▁▁▇
## -122.52 ▇▁▁▁▁▁▁▇
P.ochra_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot()
K.tuni_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot()
E.tros_only %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot()
That last one looked like the log() function might not be applicable. Let’s see what it looks like without that:
E.tros_only %>% ggplot(aes(marine_common_year, total, group = marine_common_year)) + geom_boxplot()
The most recent and catastrophic sea star wasting disease outbreak hit the west coast of North America around 2013. How did sea star abundance look before the epidemic, how did it change during the epidemic, and have the populations rebounded?
last_decade <- filter(clean_data, marine_common_year >= "2010")
last_decade %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year)) + geom_boxplot()
It looks like populations leveled back out at 2016 and remained relatively unchanged in 2017 and 2018.
It looks like theres a detectable decrease between 2013 and 2014, and that populations leveled back out at 2016 and remained relatively unchanged in 2017 and 2018. Lets narrow the time frame to only include sampling between 2013 and 2016.
last_decade$marine_common_year <- as.factor(last_decade$marine_common_year)
SSWD_timeframe <- filter(clean_data, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe$marine_common_year <- as.factor(SSWD_timeframe$marine_common_year)
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot()
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe %>% ggplot(aes(marine_common_year, log(total), col=state)) + geom_jitter(width = 0.45, alpha = 0.25) +
facet_wrap(~state, 1) + geom_smooth(method = "lm", col="black") + theme(legend.position = "none")
SSWD_timeframe_CA <- filter(SSWD_timeframe, state == "CA")
SSWD_timeframe_CA$marine_common_year <- as.factor(SSWD_timeframe_CA$marine_common_year)
SSWD_timeframe_CA %>% ggplot(aes(marine_common_year, log(total), col=marine_common_year)) + geom_jitter(width = 0.5, alpha = 0.25) + geom_smooth(method = "lm", col="red") + theme(legend.position = "none")
SSWD_timeframe_P.ochra <- filter(P.ochra_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_P.ochra$marine_common_year <- as.factor(SSWD_timeframe_P.ochra$marine_common_year)
SSWD_timeframe_P.ochra %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe_K.tuni <- filter(K.tuni_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_K.tuni$marine_common_year <- as.factor(SSWD_timeframe_K.tuni$marine_common_year)
SSWD_timeframe_K.tuni %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
SSWD_timeframe_E.tros <- filter(E.tros_only, marine_common_year >= "2013" & marine_common_year <= "2016")
SSWD_timeframe_E.tros$marine_common_year <- as.factor(SSWD_timeframe_E.tros$marine_common_year)
SSWD_timeframe_E.tros %>% ggplot(aes(marine_common_year, log(total), group = marine_common_year, col=marine_common_year)) + geom_boxplot() + facet_wrap(~state, 1) + theme(legend.position = "none")
OH! I didn’t catch that the reason why E.tros had such few survey sightings was because its range appears to only be in Alaska. Speaking of which….
Species <- SSWD_timeframe$species_code
broad_jitter <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species), size = 2, alpha = 0.3, width = 3) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-128, -115), ylim = c(32, 49.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
narrow_jitter <- ggplot(data = world) +
geom_sf() +
geom_jitter(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species), size = 2, alpha = 0.3, width = 0.3) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-128, -115), ylim = c(32, 49.5), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
gridExtra::grid.arrange(broad_jitter,narrow_jitter,nrow=2)
## Scale on map varies by more than 10%, scale bar may be inaccurate
## Scale on map varies by more than 10%, scale bar may be inaccurate
AK_zoom1_SSWD <- ggplot(data = world) +
geom_sf() +
geom_point(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species,), size = 2, alpha = 0.4) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-140, -130), ylim = c(55, 62), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
AK_zoom2_SSWD <- ggplot(data = world) +
geom_sf() +
geom_point(data= SSWD_timeframe,aes(x=SSWD_timeframe$longitude, y=SSWD_timeframe$latitude,col=Species,), size = 2, alpha = 0.4) +
annotation_scale(location = "bl", width_hint = 0.3) +
coord_sf(xlim = c(-136, -135), ylim = c(56.6, 57.3), expand = FALSE) +
xlab("Longitude") +
ylab("Latitude")
grid.arrange(AK_zoom1_SSWD, AK_zoom2_SSWD, nrow = 2)
## Scale on map varies by more than 10%, scale bar may be inaccurate